In [27]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

plt.style.use("ggplot")
sns.set_theme()
In [4]:
df = pd.read_csv("Crimes_-_2001_to_Present.csv")

df.head()
Out[4]:
ID Case Number Date Block IUCR Primary Type Description Location Description Arrest Domestic ... Ward Community Area FBI Code X Coordinate Y Coordinate Year Updated On Latitude Longitude Location
0 10224738 HY411648 09/05/2015 01:30:00 PM 043XX S WOOD ST 0486 BATTERY DOMESTIC BATTERY SIMPLE RESIDENCE False True ... 12.0 61.0 08B 1165074.0 1875917.0 2015 02/10/2018 03:50:01 PM 41.815117 -87.670000 (41.815117282, -87.669999562)
1 10224739 HY411615 09/04/2015 11:30:00 AM 008XX N CENTRAL AVE 0870 THEFT POCKET-PICKING CTA BUS False False ... 29.0 25.0 06 1138875.0 1904869.0 2015 02/10/2018 03:50:01 PM 41.895080 -87.765400 (41.895080471, -87.765400451)
2 11646166 JC213529 09/01/2018 12:01:00 AM 082XX S INGLESIDE AVE 0810 THEFT OVER $500 RESIDENCE False True ... 8.0 44.0 06 NaN NaN 2018 04/06/2019 04:04:43 PM NaN NaN NaN
3 10224740 HY411595 09/05/2015 12:45:00 PM 035XX W BARRY AVE 2023 NARCOTICS POSS: HEROIN(BRN/TAN) SIDEWALK True False ... 35.0 21.0 18 1152037.0 1920384.0 2015 02/10/2018 03:50:01 PM 41.937406 -87.716650 (41.937405765, -87.716649687)
4 10224741 HY411610 09/05/2015 01:00:00 PM 0000X N LARAMIE AVE 0560 ASSAULT SIMPLE APARTMENT False True ... 28.0 25.0 08A 1141706.0 1900086.0 2015 02/10/2018 03:50:01 PM 41.881903 -87.755121 (41.881903443, -87.755121152)

5 rows × 22 columns

In [29]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7784664 entries, 0 to 7784663
Data columns (total 22 columns):
 #   Column                Dtype  
---  ------                -----  
 0   ID                    int64  
 1   Case Number           object 
 2   Date                  object 
 3   Block                 object 
 4   IUCR                  object 
 5   Primary Type          object 
 6   Description           object 
 7   Location Description  object 
 8   Arrest                bool   
 9   Domestic              bool   
 10  Beat                  int64  
 11  District              float64
 12  Ward                  float64
 13  Community Area        float64
 14  FBI Code              object 
 15  X Coordinate          float64
 16  Y Coordinate          float64
 17  Year                  int64  
 18  Updated On            object 
 19  Latitude              float64
 20  Longitude             float64
 21  Location              object 
dtypes: bool(2), float64(7), int64(3), object(10)
memory usage: 1.2+ GB
In [30]:
df['Date'] = pd.to_datetime(df['Date'])
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
df['Hour'] = df['Date'].dt.hour
In [31]:
df = df.dropna(subset=['Latitude', 'Longitude'])
In [32]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(10,5))
top = df['Primary Type'].value_counts().head(10)

sns.barplot(x=top.index, y=top.values)
plt.title("Top 10 Crime Types in Chicago")
plt.xticks(rotation=45)
plt.ylabel("Number of Incidents")
plt.show()
In [33]:
crime_by_year = (
    df.groupby(['Year', 'Primary Type'])
      .size()
      .reset_index(name='Count')
)
crime_by_year.head()
Out[33]:
Year Primary Type Count
0 2001 ARSON 1005
1 2001 ASSAULT 31260
2 2001 BATTERY 93050
3 2001 BURGLARY 25943
4 2001 CRIM SEXUAL ASSAULT 1747
In [34]:
top10_by_year = (
    crime_by_year
    .sort_values(['Year', 'Count'], ascending=[True, False])
    .groupby('Year')
    .head(10)
)
top10_by_year.head()
Out[34]:
Year Primary Type Count
29 2001 THEFT 98447
2 2001 BATTERY 93050
5 2001 CRIMINAL DAMAGE 55590
17 2001 NARCOTICS 50318
1 2001 ASSAULT 31260
In [35]:
monthly = df.resample('M', on='Date').size()

plt.figure(figsize=(12,5))
plt.plot(monthly.index, monthly.values)
plt.title("Monthly Crime Trend in Chicago")
plt.xlabel("Year")
plt.ylabel("Crimes")
plt.show()
In [36]:
plt.figure(figsize=(8,4))
sns.countplot(x='Hour', data=df, color='purple')
plt.title("Crime Frequency by Hour of Day")
plt.show()
In [37]:
arrest_counts = df['Arrest'].value_counts()

plt.figure(figsize=(5,4))
sns.barplot(x=arrest_counts.index, y=arrest_counts.values)
plt.title("Arrests vs Non-Arrests")
plt.xticks([0,1], ['No Arrest', 'Arrest'])
plt.show()
In [38]:
district_crime = df.groupby('District').size().reset_index(name='Count')

plt.figure(figsize=(8,6))
sns.heatmap(
    district_crime.pivot_table(index='District', values='Count'),
    cmap='Reds',
    annot=True,
    fmt=".0f"
)
plt.title("Crime Count by District")
plt.show()
In [39]:
!pip install folium
Requirement already satisfied: folium in /Users/willowolabi/Documents/anaconda3/lib/python3.10/site-packages (0.20.0)
Requirement already satisfied: branca>=0.6.0 in /Users/willowolabi/Documents/anaconda3/lib/python3.10/site-packages (from folium) (0.8.2)
Requirement already satisfied: xyzservices in /Users/willowolabi/Documents/anaconda3/lib/python3.10/site-packages (from folium) (2025.11.0)
Requirement already satisfied: jinja2>=2.9 in /Users/willowolabi/Documents/anaconda3/lib/python3.10/site-packages (from folium) (3.1.2)
Requirement already satisfied: numpy in /Users/willowolabi/Documents/anaconda3/lib/python3.10/site-packages (from folium) (1.23.5)
Requirement already satisfied: requests in /Users/willowolabi/Documents/anaconda3/lib/python3.10/site-packages (from folium) (2.28.1)
Requirement already satisfied: MarkupSafe>=2.0 in /Users/willowolabi/Documents/anaconda3/lib/python3.10/site-packages (from jinja2>=2.9->folium) (2.1.1)
Requirement already satisfied: charset-normalizer<3,>=2 in /Users/willowolabi/Documents/anaconda3/lib/python3.10/site-packages (from requests->folium) (2.0.4)
Requirement already satisfied: idna<4,>=2.5 in /Users/willowolabi/Documents/anaconda3/lib/python3.10/site-packages (from requests->folium) (3.4)
Requirement already satisfied: certifi>=2017.4.17 in /Users/willowolabi/Documents/anaconda3/lib/python3.10/site-packages (from requests->folium) (2024.8.30)
Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/willowolabi/Documents/anaconda3/lib/python3.10/site-packages (from requests->folium) (1.26.14)
In [40]:
import folium
from folium.plugins import HeatMap

m = folium.Map(location=[41.8781, -87.6298], zoom_start=10)

heat_data = df[['Latitude', 'Longitude']].values.tolist()

HeatMap(heat_data[:50000]).add_to(m)

m
Out[40]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [41]:
m.save("chicago_crime_heatmap.html")
In [42]:
!pip install -U kaleido
Requirement already satisfied: kaleido in /Users/willowolabi/Documents/anaconda3/lib/python3.10/site-packages (1.2.0)
Requirement already satisfied: logistro>=1.0.8 in /Users/willowolabi/Documents/anaconda3/lib/python3.10/site-packages (from kaleido) (2.0.1)
Requirement already satisfied: choreographer>=1.1.1 in /Users/willowolabi/Documents/anaconda3/lib/python3.10/site-packages (from kaleido) (1.2.1)
Requirement already satisfied: pytest-timeout>=2.4.0 in /Users/willowolabi/Documents/anaconda3/lib/python3.10/site-packages (from kaleido) (2.4.0)
Requirement already satisfied: orjson>=3.10.15 in /Users/willowolabi/Documents/anaconda3/lib/python3.10/site-packages (from kaleido) (3.11.4)
Requirement already satisfied: packaging in /Users/willowolabi/Documents/anaconda3/lib/python3.10/site-packages (from kaleido) (22.0)
Requirement already satisfied: simplejson>=3.19.3 in /Users/willowolabi/Documents/anaconda3/lib/python3.10/site-packages (from choreographer>=1.1.1->kaleido) (3.20.2)
Requirement already satisfied: pytest>=7.0.0 in /Users/willowolabi/Documents/anaconda3/lib/python3.10/site-packages (from pytest-timeout>=2.4.0->kaleido) (7.1.2)
Requirement already satisfied: attrs>=19.2.0 in /Users/willowolabi/Documents/anaconda3/lib/python3.10/site-packages (from pytest>=7.0.0->pytest-timeout>=2.4.0->kaleido) (22.1.0)
Requirement already satisfied: iniconfig in /Users/willowolabi/Documents/anaconda3/lib/python3.10/site-packages (from pytest>=7.0.0->pytest-timeout>=2.4.0->kaleido) (1.1.1)
Requirement already satisfied: pluggy<2.0,>=0.12 in /Users/willowolabi/Documents/anaconda3/lib/python3.10/site-packages (from pytest>=7.0.0->pytest-timeout>=2.4.0->kaleido) (1.0.0)
Requirement already satisfied: py>=1.8.2 in /Users/willowolabi/Documents/anaconda3/lib/python3.10/site-packages (from pytest>=7.0.0->pytest-timeout>=2.4.0->kaleido) (1.11.0)
Requirement already satisfied: tomli>=1.0.0 in /Users/willowolabi/Documents/anaconda3/lib/python3.10/site-packages (from pytest>=7.0.0->pytest-timeout>=2.4.0->kaleido) (2.0.1)
In [5]:
df["Date"] = pd.to_datetime(df["Date"])
df["Year"] = df["Date"].dt.year

crime_by_year = (
    df.groupby(["Year", "Primary Type"])
      .size()
      .reset_index(name="Count")
)

top10_by_year = (
    crime_by_year
    .sort_values(["Year", "Count"], ascending=[True, False])
    .groupby("Year")
    .head(10)
)
top10_by_year.head()
Out[5]:
Year Primary Type Count
30 2001 THEFT 99277
2 2001 BATTERY 93460
6 2001 CRIMINAL DAMAGE 55858
18 2001 NARCOTICS 50567
1 2001 ASSAULT 31384
In [6]:
import plotly.express as px

fig = px.bar(
    top10_by_year,
    x="Primary Type",
    y="Count",
    color="Primary Type",
    animation_frame="Year",
    animation_group="Primary Type",
    title="Top 10 Crime Types in Chicago by Year",
    hover_data=["Year", "Count"],
)

fig.update_layout(
    xaxis_title="Crime Type",
    yaxis_title="Number of Incidents",
    xaxis_tickangle=45,
    showlegend=False  
)

fig.show()
/Users/willowolabi/Documents/anaconda3/lib/python3.10/site-packages/kaleido/_sync_server.py:11: UserWarning:



Warning: You have Plotly version 5.9.0, which is not compatible with this version of Kaleido (1.2.0).

This means that static image generation (e.g. `fig.write_image()`) will not work.

Please upgrade Plotly to version 6.1.1 or greater, or downgrade Kaleido to version 0.2.1.


In [7]:
years = sorted(top10_by_year["Year"].unique())

def make_year_fig(year):
    d = top10_by_year[top10_by_year["Year"] == year].sort_values("Count", ascending=False)
    return px.bar(
        d,
        x="Primary Type",
        y="Count",
        color="Primary Type",
        title=f"Top 10 Crime Types in Chicago ({year})",
        hover_data=["Count"],
    )

fig = make_year_fig(years[0])

fig.update_layout(
    updatemenus=[
        {
            "buttons": [
                {
                    "label": str(y),
                    "method": "update",
                    "args": [
                        {"x": [top10_by_year[top10_by_year["Year"] == y]["Primary Type"]],
                         "y": [top10_by_year[top10_by_year["Year"] == y]["Count"]]},
                        {"title": f"Top 10 Crime Types in Chicago ({y})"}
                    ],
                }
                for y in years
            ],
            "direction": "down",
            "showactive": True,
        }
    ]
)

fig.show()
In [ ]: